{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "dc6f703f-214b-441f-af55-48a01abf5b8a",
   "metadata": {},
   "outputs": [],
   "source": [
    "from transformers import AutoModelForCausalLM, AutoTokenizer, GPTQConfig\n",
    "import torch"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "1a4e88c2-be8c-44cb-bdb4-dc05f9aab249",
   "metadata": {},
   "outputs": [],
   "source": [
    "model_name_or_path = \"facebook/opt-6.7b\"\n",
    "quantization_config = GPTQConfig(\n",
    "    bits=4,\n",
    "    group_size=128,\n",
    "    dataset='wikitext2',\n",
    "    desc_act=False,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "6c6501a0-4f01-435d-aa03-5aee461a6e1f",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Downloading shards: 100%|██████████████████████████████████████████████████████████| 2/2 [15:45<00:00, 472.87s/it]\n",
      "Loading checkpoint shards: 100%|████████████████████████████████████████████████████| 2/2 [00:03<00:00,  1.78s/it]\n",
      "Generating test split: 100%|████████████████████████████████████████| 4358/4358 [00:00<00:00, 73119.79 examples/s]\n",
      "Generating train split: 100%|████████████████████████████████████| 36718/36718 [00:00<00:00, 496327.51 examples/s]\n",
      "Generating validation split: 100%|█████████████████████████████████| 3760/3760 [00:00<00:00, 331942.39 examples/s]\n",
      "Quantizing model.decoder.layers blocks :   0%|                                             | 0/32 [00:00<?, ?it/s]\n",
      "\u001b[Antizing layers inside the block:   0%|                                                   | 0/6 [00:00<?, ?it/s]\n",
      "\u001b[Antizing layers inside the block:  17%|███████▏                                   | 1/6 [00:08<00:44,  8.94s/it]\n",
      "\u001b[Antizing layers inside the block:  33%|██████████████▎                            | 2/6 [00:17<00:35,  8.82s/it]\n",
      "\u001b[Antizing layers inside the block:  50%|█████████████████████▌                     | 3/6 [00:26<00:26,  8.80s/it]\n",
      "\u001b[Antizing layers inside the block:  67%|████████████████████████████▋              | 4/6 [00:35<00:17,  8.81s/it]\n",
      "\u001b[Antizing layers inside the block:  83%|███████████████████████████████████▊       | 5/6 [00:44<00:08,  8.87s/it]\n",
      "\u001b[Antizing layers inside the block: 100%|███████████████████████████████████████████| 6/6 [01:26<00:00, 20.32s/it]\n",
      "Quantizing model.decoder.layers blocks :   3%|█▏                                   | 1/32 [01:31<47:05, 91.15s/it]\n",
      "\u001b[Antizing layers inside the block:   0%|                                                   | 0/6 [00:00<?, ?it/s]\n",
      "\u001b[Antizing layers inside the block:  17%|███████▏                                   | 1/6 [00:08<00:44,  8.85s/it]\n",
      "\u001b[Antizing layers inside the block:  33%|██████████████▎                            | 2/6 [00:17<00:35,  8.88s/it]\n",
      "\u001b[Antizing layers inside the block:  50%|█████████████████████▌                     | 3/6 [00:26<00:26,  8.89s/it]\n",
      "\u001b[Antizing layers inside the block:  67%|████████████████████████████▋              | 4/6 [00:35<00:17,  8.91s/it]\n",
      "\u001b[Antizing layers inside the block:  83%|███████████████████████████████████▊       | 5/6 [00:44<00:08,  8.99s/it]\n",
      "\u001b[Antizing layers inside the block: 100%|███████████████████████████████████████████| 6/6 [01:27<00:00, 20.42s/it]\n",
      "Quantizing model.decoder.layers blocks :   6%|██▎                                  | 2/32 [03:02<45:43, 91.45s/it]\n",
      "\u001b[Antizing layers inside the block:   0%|                                                   | 0/6 [00:00<?, ?it/s]\n",
      "\u001b[Antizing layers inside the block:  17%|███████▏                                   | 1/6 [00:08<00:44,  8.97s/it]\n",
      "\u001b[Antizing layers inside the block:  33%|██████████████▎                            | 2/6 [00:18<00:36,  9.02s/it]\n",
      "\u001b[Antizing layers inside the block:  50%|█████████████████████▌                     | 3/6 [00:27<00:27,  9.05s/it]\n",
      "\u001b[Antizing layers inside the block:  67%|████████████████████████████▋              | 4/6 [00:36<00:18,  9.05s/it]\n",
      "\u001b[Antizing layers inside the block:  83%|███████████████████████████████████▊       | 5/6 [00:45<00:09,  9.11s/it]\n",
      "\u001b[Antizing layers inside the block: 100%|███████████████████████████████████████████| 6/6 [01:28<00:00, 20.77s/it]\n",
      "Quantizing model.decoder.layers blocks :   9%|███▍                                 | 3/32 [04:36<44:35, 92.25s/it]\n",
      "\u001b[Antizing layers inside the block:   0%|                                                   | 0/6 [00:00<?, ?it/s]\n",
      "\u001b[Antizing layers inside the block:  17%|███████▏                                   | 1/6 [00:09<00:45,  9.09s/it]\n",
      "\u001b[Antizing layers inside the block:  33%|██████████████▎                            | 2/6 [00:18<00:36,  9.12s/it]\n",
      "\u001b[Antizing layers inside the block:  50%|█████████████████████▌                     | 3/6 [00:27<00:27,  9.14s/it]\n",
      "\u001b[Antizing layers inside the block:  67%|████████████████████████████▋              | 4/6 [00:36<00:18,  9.12s/it]\n",
      "\u001b[Antizing layers inside the block:  83%|███████████████████████████████████▊       | 5/6 [00:45<00:09,  9.15s/it]\n",
      "\u001b[Antizing layers inside the block: 100%|███████████████████████████████████████████| 6/6 [01:29<00:00, 20.86s/it]\n",
      "Quantizing model.decoder.layers blocks :  12%|████▋                                | 4/32 [06:09<43:19, 92.84s/it]\n",
      "\u001b[Antizing layers inside the block:   0%|                                                   | 0/6 [00:00<?, ?it/s]\n",
      "\u001b[Antizing layers inside the block:  17%|███████▏                                   | 1/6 [00:09<00:45,  9.09s/it]\n",
      "\u001b[Antizing layers inside the block:  33%|██████████████▎                            | 2/6 [00:18<00:36,  9.12s/it]\n",
      "\u001b[Antizing layers inside the block:  50%|█████████████████████▌                     | 3/6 [00:27<00:27,  9.12s/it]\n",
      "\u001b[Antizing layers inside the block:  67%|████████████████████████████▋              | 4/6 [00:36<00:18,  9.12s/it]\n",
      "\u001b[Antizing layers inside the block:  83%|███████████████████████████████████▊       | 5/6 [00:45<00:09,  9.18s/it]\n",
      "\u001b[Antizing layers inside the block: 100%|███████████████████████████████████████████| 6/6 [01:29<00:00, 20.93s/it]\n",
      "Quantizing model.decoder.layers blocks :  16%|█████▊                               | 5/32 [07:43<41:58, 93.26s/it]\n",
      "\u001b[Antizing layers inside the block:   0%|                                                   | 0/6 [00:00<?, ?it/s]\n",
      "\u001b[Antizing layers inside the block:  17%|███████▏                                   | 1/6 [00:09<00:45,  9.12s/it]\n",
      "\u001b[Antizing layers inside the block:  33%|██████████████▎                            | 2/6 [00:18<00:36,  9.18s/it]\n",
      "\u001b[Antizing layers inside the block:  50%|█████████████████████▌                     | 3/6 [00:27<00:27,  9.20s/it]\n",
      "\u001b[Antizing layers inside the block:  67%|████████████████████████████▋              | 4/6 [00:36<00:18,  9.18s/it]\n",
      "\u001b[Antizing layers inside the block:  83%|███████████████████████████████████▊       | 5/6 [00:46<00:09,  9.24s/it]\n",
      "\u001b[Antizing layers inside the block: 100%|███████████████████████████████████████████| 6/6 [01:30<00:00, 21.14s/it]\n",
      "Quantizing model.decoder.layers blocks :  19%|██████▉                              | 6/32 [09:18<40:39, 93.81s/it]\n",
      "\u001b[Antizing layers inside the block:   0%|                                                   | 0/6 [00:00<?, ?it/s]\n",
      "\u001b[Antizing layers inside the block:  17%|███████▏                                   | 1/6 [00:09<00:46,  9.20s/it]\n",
      "\u001b[Antizing layers inside the block:  33%|██████████████▎                            | 2/6 [00:18<00:36,  9.21s/it]\n",
      "\u001b[Antizing layers inside the block:  50%|█████████████████████▌                     | 3/6 [00:27<00:27,  9.23s/it]\n",
      "\u001b[Antizing layers inside the block:  67%|████████████████████████████▋              | 4/6 [00:36<00:18,  9.22s/it]\n",
      "\u001b[Antizing layers inside the block:  83%|███████████████████████████████████▊       | 5/6 [00:46<00:09,  9.26s/it]\n",
      "\u001b[Antizing layers inside the block: 100%|███████████████████████████████████████████| 6/6 [01:30<00:00, 21.32s/it]\n",
      "Quantizing model.decoder.layers blocks :  22%|████████                             | 7/32 [10:54<39:19, 94.39s/it]\n",
      "\u001b[Antizing layers inside the block:   0%|                                                   | 0/6 [00:00<?, ?it/s]\n",
      "\u001b[Antizing layers inside the block:  17%|███████▏                                   | 1/6 [00:09<00:46,  9.23s/it]\n",
      "\u001b[Antizing layers inside the block:  33%|██████████████▎                            | 2/6 [00:18<00:36,  9.24s/it]\n",
      "\u001b[Antizing layers inside the block:  50%|█████████████████████▌                     | 3/6 [00:27<00:27,  9.27s/it]\n",
      "\u001b[Antizing layers inside the block:  67%|████████████████████████████▋              | 4/6 [00:37<00:18,  9.26s/it]\n",
      "\u001b[Antizing layers inside the block:  83%|███████████████████████████████████▊       | 5/6 [00:46<00:09,  9.31s/it]\n",
      "\u001b[Antizing layers inside the block: 100%|███████████████████████████████████████████| 6/6 [01:31<00:00, 21.38s/it]\n",
      "Quantizing model.decoder.layers blocks :  25%|█████████▎                           | 8/32 [12:30<37:57, 94.88s/it]\n",
      "\u001b[Antizing layers inside the block:   0%|                                                   | 0/6 [00:00<?, ?it/s]\n",
      "\u001b[Antizing layers inside the block:  17%|███████▏                                   | 1/6 [00:09<00:46,  9.30s/it]\n",
      "\u001b[Antizing layers inside the block:  33%|██████████████▎                            | 2/6 [00:18<00:37,  9.32s/it]\n",
      "\u001b[Antizing layers inside the block:  50%|█████████████████████▌                     | 3/6 [00:27<00:27,  9.32s/it]\n",
      "\u001b[Antizing layers inside the block:  67%|████████████████████████████▋              | 4/6 [00:37<00:18,  9.29s/it]\n",
      "\u001b[Antizing layers inside the block:  83%|███████████████████████████████████▊       | 5/6 [00:46<00:09,  9.34s/it]\n",
      "\u001b[Antizing layers inside the block: 100%|███████████████████████████████████████████| 6/6 [01:31<00:00, 21.43s/it]\n",
      "Quantizing model.decoder.layers blocks :  28%|██████████▍                          | 9/32 [14:06<36:31, 95.30s/it]\n",
      "\u001b[Antizing layers inside the block:   0%|                                                   | 0/6 [00:00<?, ?it/s]\n",
      "\u001b[Antizing layers inside the block:  17%|███████▏                                   | 1/6 [00:09<00:46,  9.27s/it]\n",
      "\u001b[Antizing layers inside the block:  33%|██████████████▎                            | 2/6 [00:18<00:37,  9.30s/it]\n",
      "\u001b[Antizing layers inside the block:  50%|█████████████████████▌                     | 3/6 [00:27<00:27,  9.31s/it]\n",
      "\u001b[Antizing layers inside the block:  67%|████████████████████████████▋              | 4/6 [00:37<00:18,  9.31s/it]\n",
      "\u001b[Antizing layers inside the block:  83%|███████████████████████████████████▊       | 5/6 [00:46<00:09,  9.37s/it]\n",
      "\u001b[Antizing layers inside the block: 100%|███████████████████████████████████████████| 6/6 [01:31<00:00, 21.48s/it]\n",
      "Quantizing model.decoder.layers blocks :  31%|███████████▎                        | 10/32 [15:42<35:04, 95.65s/it]\n",
      "\u001b[Antizing layers inside the block:   0%|                                                   | 0/6 [00:00<?, ?it/s]\n",
      "\u001b[Antizing layers inside the block:  17%|███████▏                                   | 1/6 [00:09<00:46,  9.31s/it]\n",
      "\u001b[Antizing layers inside the block:  33%|██████████████▎                            | 2/6 [00:18<00:37,  9.35s/it]\n",
      "\u001b[Antizing layers inside the block:  50%|█████████████████████▌                     | 3/6 [00:28<00:28,  9.36s/it]\n",
      "\u001b[Antizing layers inside the block:  67%|████████████████████████████▋              | 4/6 [00:37<00:18,  9.34s/it]\n",
      "\u001b[Antizing layers inside the block:  83%|███████████████████████████████████▊       | 5/6 [00:46<00:09,  9.40s/it]\n",
      "\u001b[Antizing layers inside the block: 100%|███████████████████████████████████████████| 6/6 [01:31<00:00, 21.50s/it]\n",
      "Quantizing model.decoder.layers blocks :  34%|████████████▍                       | 11/32 [17:19<33:34, 95.95s/it]\n",
      "\u001b[Antizing layers inside the block:   0%|                                                   | 0/6 [00:00<?, ?it/s]\n",
      "\u001b[Antizing layers inside the block:  17%|███████▏                                   | 1/6 [00:09<00:46,  9.30s/it]\n",
      "\u001b[Antizing layers inside the block:  33%|██████████████▎                            | 2/6 [00:18<00:37,  9.35s/it]\n",
      "\u001b[Antizing layers inside the block:  50%|█████████████████████▌                     | 3/6 [00:28<00:28,  9.37s/it]\n",
      "\u001b[Antizing layers inside the block:  67%|████████████████████████████▋              | 4/6 [00:37<00:18,  9.33s/it]\n",
      "\u001b[Antizing layers inside the block:  83%|███████████████████████████████████▊       | 5/6 [00:46<00:09,  9.38s/it]\n",
      "\u001b[Antizing layers inside the block: 100%|███████████████████████████████████████████| 6/6 [01:32<00:00, 21.59s/it]\n",
      "Quantizing model.decoder.layers blocks :  38%|█████████████▌                      | 12/32 [18:56<32:04, 96.24s/it]\n",
      "\u001b[Antizing layers inside the block:   0%|                                                   | 0/6 [00:00<?, ?it/s]\n",
      "\u001b[Antizing layers inside the block:  17%|███████▏                                   | 1/6 [00:09<00:46,  9.33s/it]\n",
      "\u001b[Antizing layers inside the block:  33%|██████████████▎                            | 2/6 [00:18<00:37,  9.38s/it]\n",
      "\u001b[Antizing layers inside the block:  50%|█████████████████████▌                     | 3/6 [00:28<00:28,  9.40s/it]\n",
      "\u001b[Antizing layers inside the block:  67%|████████████████████████████▋              | 4/6 [00:37<00:18,  9.36s/it]\n",
      "\u001b[Antizing layers inside the block:  83%|███████████████████████████████████▊       | 5/6 [00:46<00:09,  9.41s/it]\n",
      "\u001b[Antizing layers inside the block: 100%|███████████████████████████████████████████| 6/6 [01:32<00:00, 21.69s/it]\n",
      "Quantizing model.decoder.layers blocks :  41%|██████████████▋                     | 13/32 [20:33<30:34, 96.56s/it]\n",
      "\u001b[Antizing layers inside the block:   0%|                                                   | 0/6 [00:00<?, ?it/s]\n",
      "\u001b[Antizing layers inside the block:  17%|███████▏                                   | 1/6 [00:09<00:46,  9.35s/it]\n",
      "\u001b[Antizing layers inside the block:  33%|██████████████▎                            | 2/6 [00:18<00:37,  9.37s/it]\n",
      "\u001b[Antizing layers inside the block:  50%|█████████████████████▌                     | 3/6 [00:28<00:28,  9.38s/it]\n",
      "\u001b[Antizing layers inside the block:  67%|████████████████████████████▋              | 4/6 [00:37<00:18,  9.36s/it]\n",
      "\u001b[Antizing layers inside the block:  83%|███████████████████████████████████▊       | 5/6 [00:46<00:09,  9.42s/it]\n",
      "\u001b[Antizing layers inside the block: 100%|███████████████████████████████████████████| 6/6 [01:33<00:00, 21.94s/it]\n",
      "Quantizing model.decoder.layers blocks :  44%|███████████████▊                    | 14/32 [22:11<29:05, 97.00s/it]\n",
      "\u001b[Antizing layers inside the block:   0%|                                                   | 0/6 [00:00<?, ?it/s]\n",
      "\u001b[Antizing layers inside the block:  17%|███████▏                                   | 1/6 [00:09<00:46,  9.38s/it]\n",
      "\u001b[Antizing layers inside the block:  33%|██████████████▎                            | 2/6 [00:18<00:37,  9.39s/it]\n",
      "\u001b[Antizing layers inside the block:  50%|█████████████████████▌                     | 3/6 [00:28<00:28,  9.41s/it]\n",
      "\u001b[Antizing layers inside the block:  67%|████████████████████████████▋              | 4/6 [00:37<00:18,  9.38s/it]\n",
      "\u001b[Antizing layers inside the block:  83%|███████████████████████████████████▊       | 5/6 [00:47<00:09,  9.43s/it]\n",
      "\u001b[Antizing layers inside the block: 100%|███████████████████████████████████████████| 6/6 [01:33<00:00, 22.07s/it]\n",
      "Quantizing model.decoder.layers blocks :  47%|████████████████▉                   | 15/32 [23:50<27:36, 97.44s/it]\n",
      "\u001b[Antizing layers inside the block:   0%|                                                   | 0/6 [00:00<?, ?it/s]\n",
      "\u001b[Antizing layers inside the block:  17%|███████▏                                   | 1/6 [00:09<00:46,  9.40s/it]\n",
      "\u001b[Antizing layers inside the block:  33%|██████████████▎                            | 2/6 [00:18<00:37,  9.39s/it]\n",
      "\u001b[Antizing layers inside the block:  50%|█████████████████████▌                     | 3/6 [00:28<00:28,  9.42s/it]\n",
      "\u001b[Antizing layers inside the block:  67%|████████████████████████████▋              | 4/6 [00:37<00:18,  9.40s/it]\n",
      "\u001b[Antizing layers inside the block:  83%|███████████████████████████████████▊       | 5/6 [00:47<00:09,  9.46s/it]\n",
      "\u001b[Antizing layers inside the block: 100%|███████████████████████████████████████████| 6/6 [01:34<00:00, 22.20s/it]\n",
      "Quantizing model.decoder.layers blocks :  50%|██████████████████                  | 16/32 [25:29<26:06, 97.89s/it]\n",
      "\u001b[Antizing layers inside the block:   0%|                                                   | 0/6 [00:00<?, ?it/s]\n",
      "\u001b[Antizing layers inside the block:  17%|███████▏                                   | 1/6 [00:09<00:46,  9.32s/it]\n",
      "\u001b[Antizing layers inside the block:  33%|██████████████▎                            | 2/6 [00:18<00:37,  9.26s/it]\n",
      "\u001b[Antizing layers inside the block:  50%|█████████████████████▌                     | 3/6 [00:27<00:27,  9.27s/it]\n",
      "\u001b[Antizing layers inside the block:  67%|████████████████████████████▋              | 4/6 [00:37<00:18,  9.28s/it]\n",
      "\u001b[Antizing layers inside the block:  83%|███████████████████████████████████▊       | 5/6 [00:46<00:09,  9.33s/it]\n",
      "\u001b[Antizing layers inside the block: 100%|███████████████████████████████████████████| 6/6 [01:32<00:00, 21.64s/it]\n",
      "Quantizing model.decoder.layers blocks :  53%|███████████████████▏                | 17/32 [27:06<24:24, 97.64s/it]\n",
      "\u001b[Antizing layers inside the block:   0%|                                                   | 0/6 [00:00<?, ?it/s]\n",
      "\u001b[Antizing layers inside the block:  17%|███████▏                                   | 1/6 [00:09<00:48,  9.72s/it]\n",
      "\u001b[Antizing layers inside the block:  33%|██████████████▎                            | 2/6 [00:19<00:39,  9.79s/it]\n",
      "\u001b[Antizing layers inside the block:  50%|█████████████████████▌                     | 3/6 [00:29<00:29,  9.84s/it]\n",
      "\u001b[Antizing layers inside the block:  67%|████████████████████████████▋              | 4/6 [00:39<00:19,  9.87s/it]\n",
      "\u001b[Antizing layers inside the block:  83%|███████████████████████████████████▊       | 5/6 [00:49<00:09,  9.94s/it]\n",
      "\u001b[Antizing layers inside the block: 100%|███████████████████████████████████████████| 6/6 [01:36<00:00, 22.49s/it]\n",
      "Quantizing model.decoder.layers blocks :  56%|████████████████████▎               | 18/32 [28:47<23:03, 98.82s/it]\n",
      "\u001b[Antizing layers inside the block:   0%|                                                   | 0/6 [00:00<?, ?it/s]\n",
      "\u001b[Antizing layers inside the block:  17%|███████▏                                   | 1/6 [00:09<00:49,  9.98s/it]\n",
      "\u001b[Antizing layers inside the block:  33%|██████████████▎                            | 2/6 [00:20<00:40, 10.02s/it]\n",
      "\u001b[Antizing layers inside the block:  50%|█████████████████████▌                     | 3/6 [00:30<00:30, 10.03s/it]\n",
      "\u001b[Antizing layers inside the block:  67%|████████████████████████████▋              | 4/6 [00:40<00:20, 10.03s/it]\n",
      "\u001b[Antizing layers inside the block:  83%|███████████████████████████████████▊       | 5/6 [00:50<00:10, 10.10s/it]\n",
      "\u001b[Antizing layers inside the block: 100%|███████████████████████████████████████████| 6/6 [01:38<00:00, 22.98s/it]\n",
      "Quantizing model.decoder.layers blocks :  59%|████████████████████▊              | 19/32 [30:31<21:43, 100.25s/it]\n",
      "\u001b[Antizing layers inside the block:   0%|                                                   | 0/6 [00:00<?, ?it/s]\n",
      "\u001b[Antizing layers inside the block:  17%|███████▏                                   | 1/6 [00:10<00:50, 10.19s/it]\n",
      "\u001b[Antizing layers inside the block:  33%|██████████████▎                            | 2/6 [00:20<00:41, 10.27s/it]\n",
      "\u001b[Antizing layers inside the block:  50%|█████████████████████▌                     | 3/6 [00:30<00:30, 10.25s/it]\n",
      "\u001b[Antizing layers inside the block:  67%|████████████████████████████▋              | 4/6 [00:40<00:20, 10.23s/it]\n",
      "\u001b[Antizing layers inside the block:  83%|███████████████████████████████████▊       | 5/6 [00:51<00:10, 10.33s/it]\n",
      "\u001b[Antizing layers inside the block: 100%|███████████████████████████████████████████| 6/6 [01:39<00:00, 23.30s/it]\n",
      "Quantizing model.decoder.layers blocks :  62%|█████████████████████▉             | 20/32 [32:16<20:20, 101.73s/it]\n",
      "\u001b[Antizing layers inside the block:   0%|                                                   | 0/6 [00:00<?, ?it/s]\n",
      "\u001b[Antizing layers inside the block:  17%|███████▏                                   | 1/6 [00:10<00:51, 10.32s/it]\n",
      "\u001b[Antizing layers inside the block:  33%|██████████████▎                            | 2/6 [00:20<00:41, 10.39s/it]\n",
      "\u001b[Antizing layers inside the block:  50%|█████████████████████▌                     | 3/6 [00:31<00:31, 10.34s/it]\n",
      "\u001b[Antizing layers inside the block:  67%|████████████████████████████▋              | 4/6 [00:41<00:20, 10.33s/it]\n",
      "\u001b[Antizing layers inside the block:  83%|███████████████████████████████████▊       | 5/6 [00:51<00:10, 10.42s/it]\n",
      "\u001b[Antizing layers inside the block: 100%|███████████████████████████████████████████| 6/6 [01:40<00:00, 23.37s/it]\n",
      "Quantizing model.decoder.layers blocks :  66%|██████████████████████▉            | 21/32 [34:02<18:52, 102.92s/it]\n",
      "\u001b[Antizing layers inside the block:   0%|                                                   | 0/6 [00:00<?, ?it/s]\n",
      "\u001b[Antizing layers inside the block:  17%|███████▏                                   | 1/6 [00:10<00:51, 10.33s/it]\n",
      "\u001b[Antizing layers inside the block:  33%|██████████████▎                            | 2/6 [00:20<00:41, 10.42s/it]\n",
      "\u001b[Antizing layers inside the block:  50%|█████████████████████▌                     | 3/6 [00:31<00:31, 10.39s/it]\n",
      "\u001b[Antizing layers inside the block:  67%|████████████████████████████▋              | 4/6 [00:41<00:20, 10.35s/it]\n",
      "\u001b[Antizing layers inside the block:  83%|███████████████████████████████████▊       | 5/6 [00:52<00:10, 10.44s/it]\n",
      "\u001b[Antizing layers inside the block: 100%|███████████████████████████████████████████| 6/6 [01:40<00:00, 23.41s/it]\n",
      "Quantizing model.decoder.layers blocks :  69%|████████████████████████           | 22/32 [35:48<17:18, 103.81s/it]\n",
      "\u001b[Antizing layers inside the block:   0%|                                                   | 0/6 [00:00<?, ?it/s]\n",
      "\u001b[Antizing layers inside the block:  17%|███████▏                                   | 1/6 [00:10<00:51, 10.37s/it]\n",
      "\u001b[Antizing layers inside the block:  33%|██████████████▎                            | 2/6 [00:20<00:41, 10.41s/it]\n",
      "\u001b[Antizing layers inside the block:  50%|█████████████████████▌                     | 3/6 [00:31<00:31, 10.39s/it]\n",
      "\u001b[Antizing layers inside the block:  67%|████████████████████████████▋              | 4/6 [00:41<00:20, 10.36s/it]\n",
      "\u001b[Antizing layers inside the block:  83%|███████████████████████████████████▊       | 5/6 [00:52<00:10, 10.45s/it]\n",
      "\u001b[Antizing layers inside the block: 100%|███████████████████████████████████████████| 6/6 [01:40<00:00, 23.46s/it]\n",
      "Quantizing model.decoder.layers blocks :  72%|█████████████████████████▏         | 23/32 [37:34<15:40, 104.49s/it]\n",
      "\u001b[Antizing layers inside the block:   0%|                                                   | 0/6 [00:00<?, ?it/s]\n",
      "\u001b[Antizing layers inside the block:  17%|███████▏                                   | 1/6 [00:10<00:51, 10.39s/it]\n",
      "\u001b[Antizing layers inside the block:  33%|██████████████▎                            | 2/6 [00:20<00:41, 10.44s/it]\n",
      "\u001b[Antizing layers inside the block:  50%|█████████████████████▌                     | 3/6 [00:31<00:31, 10.42s/it]\n",
      "\u001b[Antizing layers inside the block:  67%|████████████████████████████▋              | 4/6 [00:41<00:20, 10.39s/it]\n",
      "\u001b[Antizing layers inside the block:  83%|███████████████████████████████████▊       | 5/6 [00:52<00:10, 10.46s/it]\n",
      "\u001b[Antizing layers inside the block: 100%|███████████████████████████████████████████| 6/6 [01:40<00:00, 23.47s/it]\n",
      "Quantizing model.decoder.layers blocks :  75%|██████████████████████████▎        | 24/32 [39:20<13:59, 104.98s/it]\n",
      "\u001b[Antizing layers inside the block:   0%|                                                   | 0/6 [00:00<?, ?it/s]\n",
      "\u001b[Antizing layers inside the block:  17%|███████▏                                   | 1/6 [00:10<00:51, 10.39s/it]\n",
      "\u001b[Antizing layers inside the block:  33%|██████████████▎                            | 2/6 [00:20<00:41, 10.45s/it]\n",
      "\u001b[Antizing layers inside the block:  50%|█████████████████████▌                     | 3/6 [00:31<00:31, 10.41s/it]\n",
      "\u001b[Antizing layers inside the block:  67%|████████████████████████████▋              | 4/6 [00:41<00:20, 10.38s/it]\n",
      "\u001b[Antizing layers inside the block:  83%|███████████████████████████████████▊       | 5/6 [00:52<00:10, 10.44s/it]\n",
      "\u001b[Antizing layers inside the block: 100%|███████████████████████████████████████████| 6/6 [01:40<00:00, 23.44s/it]\n",
      "Quantizing model.decoder.layers blocks :  78%|███████████████████████████▎       | 25/32 [41:06<12:17, 105.30s/it]\n",
      "\u001b[Antizing layers inside the block:   0%|                                                   | 0/6 [00:00<?, ?it/s]\n",
      "\u001b[Antizing layers inside the block:  17%|███████▏                                   | 1/6 [00:10<00:51, 10.35s/it]\n",
      "\u001b[Antizing layers inside the block:  33%|██████████████▎                            | 2/6 [00:20<00:41, 10.42s/it]\n",
      "\u001b[Antizing layers inside the block:  50%|█████████████████████▌                     | 3/6 [00:31<00:31, 10.40s/it]\n",
      "\u001b[Antizing layers inside the block:  67%|████████████████████████████▋              | 4/6 [00:41<00:20, 10.37s/it]\n",
      "\u001b[Antizing layers inside the block:  83%|███████████████████████████████████▊       | 5/6 [00:52<00:10, 10.45s/it]\n",
      "\u001b[Antizing layers inside the block: 100%|███████████████████████████████████████████| 6/6 [01:40<00:00, 23.41s/it]\n",
      "Quantizing model.decoder.layers blocks :  81%|████████████████████████████▍      | 26/32 [42:52<10:32, 105.50s/it]\n",
      "\u001b[Antizing layers inside the block:   0%|                                                   | 0/6 [00:00<?, ?it/s]\n",
      "\u001b[Antizing layers inside the block:  17%|███████▏                                   | 1/6 [00:10<00:52, 10.41s/it]\n",
      "\u001b[Antizing layers inside the block:  33%|██████████████▎                            | 2/6 [00:20<00:41, 10.44s/it]\n",
      "\u001b[Antizing layers inside the block:  50%|█████████████████████▌                     | 3/6 [00:31<00:31, 10.42s/it]\n",
      "\u001b[Antizing layers inside the block:  67%|████████████████████████████▋              | 4/6 [00:41<00:20, 10.41s/it]\n",
      "\u001b[Antizing layers inside the block:  83%|███████████████████████████████████▊       | 5/6 [00:52<00:10, 10.48s/it]\n",
      "\u001b[Antizing layers inside the block: 100%|███████████████████████████████████████████| 6/6 [01:41<00:00, 23.54s/it]\n",
      "Quantizing model.decoder.layers blocks :  84%|█████████████████████████████▌     | 27/32 [44:38<08:48, 105.77s/it]\n",
      "\u001b[Antizing layers inside the block:   0%|                                                   | 0/6 [00:00<?, ?it/s]\n",
      "\u001b[Antizing layers inside the block:  17%|███████▏                                   | 1/6 [00:10<00:52, 10.43s/it]\n",
      "\u001b[Antizing layers inside the block:  33%|██████████████▎                            | 2/6 [00:20<00:41, 10.47s/it]\n",
      "\u001b[Antizing layers inside the block:  50%|█████████████████████▌                     | 3/6 [00:31<00:31, 10.45s/it]\n",
      "\u001b[Antizing layers inside the block:  67%|████████████████████████████▋              | 4/6 [00:41<00:20, 10.42s/it]\n",
      "\u001b[Antizing layers inside the block:  83%|███████████████████████████████████▊       | 5/6 [00:52<00:10, 10.50s/it]\n",
      "\u001b[Antizing layers inside the block: 100%|███████████████████████████████████████████| 6/6 [01:41<00:00, 23.62s/it]\n",
      "Quantizing model.decoder.layers blocks :  88%|██████████████████████████████▋    | 28/32 [46:25<07:04, 106.05s/it]\n",
      "\u001b[Antizing layers inside the block:   0%|                                                   | 0/6 [00:00<?, ?it/s]\n",
      "\u001b[Antizing layers inside the block:  17%|███████▏                                   | 1/6 [00:10<00:52, 10.40s/it]\n",
      "\u001b[Antizing layers inside the block:  33%|██████████████▎                            | 2/6 [00:20<00:41, 10.46s/it]\n",
      "\u001b[Antizing layers inside the block:  50%|█████████████████████▌                     | 3/6 [00:31<00:31, 10.45s/it]\n",
      "\u001b[Antizing layers inside the block:  67%|████████████████████████████▋              | 4/6 [00:41<00:20, 10.41s/it]\n",
      "\u001b[Antizing layers inside the block:  83%|███████████████████████████████████▊       | 5/6 [00:52<00:10, 10.48s/it]\n",
      "\u001b[Antizing layers inside the block: 100%|███████████████████████████████████████████| 6/6 [01:41<00:00, 23.63s/it]\n",
      "Quantizing model.decoder.layers blocks :  91%|███████████████████████████████▋   | 29/32 [48:12<05:18, 106.25s/it]\n",
      "\u001b[Antizing layers inside the block:   0%|                                                   | 0/6 [00:00<?, ?it/s]\n",
      "\u001b[Antizing layers inside the block:  17%|███████▏                                   | 1/6 [00:10<00:51, 10.37s/it]\n",
      "\u001b[Antizing layers inside the block:  33%|██████████████▎                            | 2/6 [00:20<00:41, 10.41s/it]\n",
      "\u001b[Antizing layers inside the block:  50%|█████████████████████▌                     | 3/6 [00:31<00:31, 10.39s/it]\n",
      "\u001b[Antizing layers inside the block:  67%|████████████████████████████▋              | 4/6 [00:41<00:20, 10.35s/it]\n",
      "\u001b[Antizing layers inside the block:  83%|███████████████████████████████████▊       | 5/6 [00:52<00:10, 10.43s/it]\n",
      "\u001b[Antizing layers inside the block: 100%|███████████████████████████████████████████| 6/6 [01:41<00:00, 23.65s/it]\n",
      "Quantizing model.decoder.layers blocks :  94%|████████████████████████████████▊  | 30/32 [49:58<03:32, 106.35s/it]\n",
      "\u001b[Antizing layers inside the block:   0%|                                                   | 0/6 [00:00<?, ?it/s]\n",
      "\u001b[Antizing layers inside the block:  17%|███████▏                                   | 1/6 [00:10<00:51, 10.37s/it]\n",
      "\u001b[Antizing layers inside the block:  33%|██████████████▎                            | 2/6 [00:20<00:41, 10.41s/it]\n",
      "\u001b[Antizing layers inside the block:  50%|█████████████████████▌                     | 3/6 [00:31<00:31, 10.38s/it]\n",
      "\u001b[Antizing layers inside the block:  67%|████████████████████████████▋              | 4/6 [00:41<00:20, 10.36s/it]\n",
      "\u001b[Antizing layers inside the block:  83%|███████████████████████████████████▊       | 5/6 [00:52<00:10, 10.45s/it]\n",
      "\u001b[Antizing layers inside the block: 100%|███████████████████████████████████████████| 6/6 [01:41<00:00, 23.79s/it]\n",
      "Quantizing model.decoder.layers blocks :  97%|█████████████████████████████████▉ | 31/32 [51:45<01:46, 106.55s/it]\n",
      "\u001b[Antizing layers inside the block:   0%|                                                   | 0/6 [00:00<?, ?it/s]\n",
      "\u001b[Antizing layers inside the block:  17%|███████▏                                   | 1/6 [00:10<00:51, 10.37s/it]\n",
      "\u001b[Antizing layers inside the block:  33%|██████████████▎                            | 2/6 [00:20<00:41, 10.41s/it]\n",
      "\u001b[Antizing layers inside the block:  50%|█████████████████████▌                     | 3/6 [00:31<00:31, 10.36s/it]\n",
      "\u001b[Antizing layers inside the block:  67%|████████████████████████████▋              | 4/6 [00:41<00:20, 10.30s/it]\n",
      "\u001b[Antizing layers inside the block:  83%|███████████████████████████████████▊       | 5/6 [00:51<00:10, 10.43s/it]\n",
      "\u001b[Antizing layers inside the block: 100%|███████████████████████████████████████████| 6/6 [01:41<00:00, 23.68s/it]\n",
      "Quantizing model.decoder.layers blocks : 100%|███████████████████████████████████| 32/32 [53:32<00:00, 100.38s/it]\n",
      "/home/david/anaconda3/envs/peft/lib/python3.10/site-packages/transformers/modeling_utils.py:5055: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead\n",
      "  warnings.warn(\n",
      "`loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`.\n"
     ]
    }
   ],
   "source": [
    "quant_model = AutoModelForCausalLM.from_pretrained(\n",
    "    model_name_or_path,\n",
    "    quantization_config=quantization_config,\n",
    "    device_map=\"auto\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "98f580f8-9acc-4b54-9fe3-2e19e0f41539",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'training': True,\n",
       " '_parameters': OrderedDict(),\n",
       " '_buffers': OrderedDict([('qweight',\n",
       "               tensor([[-1533367701,  2022282410, -1182414405,  ..., -1537760937,\n",
       "                          968190105, -1218869078],\n",
       "                       [-1973762678, -1990888091,  1451669112,  ...,  1767327094,\n",
       "                         1485273242, -1769109111],\n",
       "                       [ -890521657,  1705355194,  2042256023,  ...,  1401453177,\n",
       "                         -963081656, -1212573545],\n",
       "                       ...,\n",
       "                       [-1192544404,   697191045,  1432856694,  ...,  1967820506,\n",
       "                        -1482119368, -1787262823],\n",
       "                       [-1736931943, -1753576812,  2027985786,  ..., -1757504693,\n",
       "                         2090308806, -1987483739],\n",
       "                       [ 1549191317,  1151064006, -1735993498,  ..., -1317428394,\n",
       "                        -1182375288, -1199925157]], device='cuda:0', dtype=torch.int32)),\n",
       "              ('qzeros',\n",
       "               tensor([[2004318071, 2004318071, 2004318071,  ..., 2004318071, 2004318071,\n",
       "                        2004318071],\n",
       "                       [2004318071, 2004318071, 2004318071,  ..., 2004318071, 2004318071,\n",
       "                        2004318071],\n",
       "                       [2004318071, 2004318071, 2004318071,  ..., 2004318071, 2004318071,\n",
       "                        2004318071],\n",
       "                       ...,\n",
       "                       [2004318071, 2004318071, 2004318071,  ..., 2004318071, 2004318071,\n",
       "                        2004318071],\n",
       "                       [2004318071, 2004318071, 2004318071,  ..., 2004318071, 2004318071,\n",
       "                        2004318071],\n",
       "                       [2004318071, 2004318071, 2004318071,  ..., 2004318071, 2004318071,\n",
       "                        2004318071]], device='cuda:0', dtype=torch.int32)),\n",
       "              ('scales',\n",
       "               tensor([[0.0039, 0.0035, 0.0035,  ..., 0.0030, 0.0028, 0.0044],\n",
       "                       [0.0032, 0.0030, 0.0036,  ..., 0.0034, 0.0028, 0.0033],\n",
       "                       [0.0038, 0.0029, 0.0039,  ..., 0.0043, 0.0025, 0.0035],\n",
       "                       ...,\n",
       "                       [0.0033, 0.0034, 0.0033,  ..., 0.0032, 0.0038, 0.0030],\n",
       "                       [0.0030, 0.0028, 0.0027,  ..., 0.0028, 0.0038, 0.0026],\n",
       "                       [0.0034, 0.0035, 0.0034,  ..., 0.0027, 0.0027, 0.0042]],\n",
       "                      device='cuda:0', dtype=torch.float16)),\n",
       "              ('g_idx',\n",
       "               tensor([ 0,  0,  0,  ..., 31, 31, 31], device='cuda:0', dtype=torch.int32)),\n",
       "              ('bias',\n",
       "               tensor([ 0.0061,  0.0129, -0.0215,  ...,  0.0157, -0.0144,  0.0106],\n",
       "                      device='cuda:0', dtype=torch.float16))]),\n",
       " '_non_persistent_buffers_set': set(),\n",
       " '_backward_pre_hooks': OrderedDict(),\n",
       " '_backward_hooks': OrderedDict(),\n",
       " '_is_full_backward_hook': None,\n",
       " '_forward_hooks': OrderedDict(),\n",
       " '_forward_hooks_with_kwargs': OrderedDict(),\n",
       " '_forward_hooks_always_called': OrderedDict(),\n",
       " '_forward_pre_hooks': OrderedDict(),\n",
       " '_forward_pre_hooks_with_kwargs': OrderedDict(),\n",
       " '_state_dict_hooks': OrderedDict(),\n",
       " '_state_dict_pre_hooks': OrderedDict(),\n",
       " '_load_state_dict_pre_hooks': OrderedDict(),\n",
       " '_load_state_dict_post_hooks': OrderedDict(),\n",
       " '_modules': OrderedDict(),\n",
       " 'infeatures': 4096,\n",
       " 'outfeatures': 4096,\n",
       " 'bits': 4,\n",
       " 'group_size': 128,\n",
       " 'maxq': 15,\n",
       " 'half_indim': 2048,\n",
       " 'use_cuda_fp16': True,\n",
       " 'wf': tensor([[ 0,  4,  8, 12, 16, 20, 24, 28]], dtype=torch.int32),\n",
       " 'kernel_switch_threshold': 128,\n",
       " 'autogptq_cuda_available': False,\n",
       " 'autogptq_cuda': None,\n",
       " 'trainable': False,\n",
       " 'device': device(type='cuda', index=0)}"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "quant_model.model.decoder.layers[0].self_attn.q_proj.__dict__"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "743ce82b-71d3-4191-a87b-ed596a3cdb00",
   "metadata": {},
   "outputs": [],
   "source": [
    "quant_model.save_pretrained(\"../models/opt-6.7b-qptq\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "d9fe945a-3379-4c11-a8c7-44d2dff92c53",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Chinese has many hero, so Chinese has many hero.\n",
      "Chinese has many hero, so Chinese has many hero.\n",
      "Chinese has many hero, so Chinese has many hero.\n",
      "Chinese has many hero, so Chinese has many hero.\n",
      "Chinese has many hero, so Chinese has many hero.\n",
      "Chinese has many hero, so Chinese has many hero.\n",
      "\n"
     ]
    }
   ],
   "source": [
    "tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)\n",
    "\n",
    "text = \"Chinese has many hero, so Chinese has\"\n",
    "inputs =tokenizer(text, return_tensors=\"pt\").to(0)\n",
    "out = quant_model.generate(**inputs, max_new_tokens=64)\n",
    "print(tokenizer.decode(out[0], skip_special_tokens=True))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "62321dd2-075e-49b0-9578-0666f27401ba",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "春节快乐，中国人需要一家人的爱情,我们需要一家人的爱情,我们需要一家人的爱情,我们需要一家人的爱\n"
     ]
    }
   ],
   "source": [
    "text = \"春节快乐，中国人需要一家人\"\n",
    "inputs =tokenizer(text, return_tensors=\"pt\").to(0)\n",
    "out = quant_model.generate(**inputs, max_new_tokens=64)\n",
    "print(tokenizer.decode(out[0], skip_special_tokens=True))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "867a0668-bae7-4f82-b79d-61cfbcf84975",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "春节快乐，广东人系春节期间要吃年糕，一家人吃饭，拜年，年初一糕，年初一糕，年初一糕，年初一糕，年初一糕，年初一糕，年�\n"
     ]
    }
   ],
   "source": [
    "text = \"春节快乐，广东人系春节期间要吃年糕，一家人吃饭，拜年，年初一\"\n",
    "inputs =tokenizer(text, return_tensors=\"pt\").to(0)\n",
    "out = quant_model.generate(**inputs, max_new_tokens=64)\n",
    "print(tokenizer.decode(out[0], skip_special_tokens=True))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "6b2128ca-f21e-4cb5-baf3-23e69f58b226",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Guangdong people eat rice cake and family meals during the Chinese New Year. on the first day of the Chinese new year, the family will have a meal together.\n",
      "\n",
      "The family will have a meal together.\n",
      "\n",
      "The family will have a meal together.\n",
      "\n",
      "The family will have a meal together.\n",
      "\n",
      "The family will have a meal together.\n",
      "\n",
      "The family will have a meal together.\n",
      "\n",
      "The family will\n"
     ]
    }
   ],
   "source": [
    "text = \"Guangdong people eat rice cake and family meals during the Chinese New Year. on the first day of the Chinese new year\"\n",
    "inputs =tokenizer(text, return_tensors=\"pt\").to(0)\n",
    "out = quant_model.generate(**inputs, max_new_tokens=64)\n",
    "print(tokenizer.decode(out[0], skip_special_tokens=True))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "d8d14769-0eed-43eb-ab47-8d4df56e12e2",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
      "To disable this warning, you can either:\n",
      "\t- Avoid using `tokenizers` before the fork if possible\n",
      "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n",
      "Loading checkpoint shards: 100%|████████████████████████████████████████████████████| 2/2 [00:05<00:00,  2.61s/it]\n",
      "Quantizing model.decoder.layers blocks :   0%|                                             | 0/32 [00:00<?, ?it/s]\n",
      "\u001b[Antizing layers inside the block:   0%|                                                   | 0/6 [00:00<?, ?it/s]\n",
      "\u001b[Antizing layers inside the block:  17%|███████▏                                   | 1/6 [00:02<00:13,  2.77s/it]\n",
      "\u001b[Antizing layers inside the block:  33%|██████████████▎                            | 2/6 [00:05<00:11,  2.75s/it]\n",
      "\u001b[Antizing layers inside the block:  50%|█████████████████████▌                     | 3/6 [00:08<00:08,  2.74s/it]\n",
      "\u001b[Antizing layers inside the block:  67%|████████████████████████████▋              | 4/6 [00:10<00:05,  2.74s/it]\n",
      "\u001b[Antizing layers inside the block:  83%|███████████████████████████████████▊       | 5/6 [00:13<00:02,  2.78s/it]\n",
      "\u001b[Antizing layers inside the block: 100%|███████████████████████████████████████████| 6/6 [00:27<00:00,  6.55s/it]\n",
      "Quantizing model.decoder.layers blocks :   3%|█▏                                   | 1/32 [00:27<14:19, 27.73s/it]\n",
      "\u001b[Antizing layers inside the block:   0%|                                                   | 0/6 [00:00<?, ?it/s]\n",
      "\u001b[Antizing layers inside the block:  17%|███████▏                                   | 1/6 [00:02<00:13,  2.75s/it]\n",
      "\u001b[Antizing layers inside the block:  33%|██████████████▎                            | 2/6 [00:05<00:10,  2.74s/it]\n",
      "\u001b[Antizing layers inside the block:  50%|█████████████████████▌                     | 3/6 [00:08<00:08,  2.75s/it]\n",
      "\u001b[Antizing layers inside the block:  67%|████████████████████████████▋              | 4/6 [00:10<00:05,  2.75s/it]\n",
      "\u001b[Antizing layers inside the block:  83%|███████████████████████████████████▊       | 5/6 [00:13<00:02,  2.80s/it]\n",
      "\u001b[Antizing layers inside the block: 100%|███████████████████████████████████████████| 6/6 [00:27<00:00,  6.49s/it]\n",
      "Quantizing model.decoder.layers blocks :   6%|██▎                                  | 2/32 [00:55<13:49, 27.64s/it]\n",
      "\u001b[Antizing layers inside the block:   0%|                                                   | 0/6 [00:00<?, ?it/s]\n",
      "\u001b[Antizing layers inside the block:  17%|███████▏                                   | 1/6 [00:02<00:13,  2.76s/it]\n",
      "\u001b[Antizing layers inside the block:  33%|██████████████▎                            | 2/6 [00:05<00:11,  2.76s/it]\n",
      "\u001b[Antizing layers inside the block:  50%|█████████████████████▌                     | 3/6 [00:08<00:08,  2.73s/it]\n",
      "\u001b[Antizing layers inside the block:  67%|████████████████████████████▋              | 4/6 [00:10<00:05,  2.74s/it]\n",
      "\u001b[Antizing layers inside the block:  83%|███████████████████████████████████▊       | 5/6 [00:13<00:02,  2.80s/it]\n",
      "\u001b[Antizing layers inside the block: 100%|███████████████████████████████████████████| 6/6 [00:27<00:00,  6.50s/it]\n",
      "Quantizing model.decoder.layers blocks :   9%|███▍                                 | 3/32 [01:22<13:20, 27.61s/it]\n",
      "\u001b[Antizing layers inside the block:   0%|                                                   | 0/6 [00:00<?, ?it/s]\n",
      "\u001b[Antizing layers inside the block:  17%|███████▏                                   | 1/6 [00:02<00:13,  2.76s/it]\n",
      "\u001b[Antizing layers inside the block:  33%|██████████████▎                            | 2/6 [00:05<00:10,  2.74s/it]\n",
      "\u001b[Antizing layers inside the block:  50%|█████████████████████▌                     | 3/6 [00:08<00:08,  2.73s/it]\n",
      "\u001b[Antizing layers inside the block:  67%|████████████████████████████▋              | 4/6 [00:10<00:05,  2.72s/it]\n",
      "\u001b[Antizing layers inside the block:  83%|███████████████████████████████████▊       | 5/6 [00:13<00:02,  2.79s/it]\n",
      "\u001b[Antizing layers inside the block: 100%|███████████████████████████████████████████| 6/6 [00:27<00:00,  6.51s/it]\n",
      "Quantizing model.decoder.layers blocks :  12%|████▋                                | 4/32 [01:50<12:53, 27.62s/it]\n",
      "\u001b[Antizing layers inside the block:   0%|                                                   | 0/6 [00:00<?, ?it/s]\n",
      "\u001b[Antizing layers inside the block:  17%|███████▏                                   | 1/6 [00:02<00:13,  2.70s/it]\n",
      "\u001b[Antizing layers inside the block:  33%|██████████████▎                            | 2/6 [00:05<00:10,  2.71s/it]\n",
      "\u001b[Antizing layers inside the block:  50%|█████████████████████▌                     | 3/6 [00:08<00:08,  2.73s/it]\n",
      "\u001b[Antizing layers inside the block:  67%|████████████████████████████▋              | 4/6 [00:10<00:05,  2.73s/it]\n",
      "\u001b[Antizing layers inside the block:  83%|███████████████████████████████████▊       | 5/6 [00:13<00:02,  2.79s/it]\n",
      "\u001b[Antizing layers inside the block: 100%|███████████████████████████████████████████| 6/6 [00:27<00:00,  6.52s/it]\n",
      "Quantizing model.decoder.layers blocks :  16%|█████▊                               | 5/32 [02:18<12:25, 27.62s/it]\n",
      "\u001b[Antizing layers inside the block:   0%|                                                   | 0/6 [00:00<?, ?it/s]\n",
      "\u001b[Antizing layers inside the block:  17%|███████▏                                   | 1/6 [00:02<00:13,  2.71s/it]\n",
      "\u001b[Antizing layers inside the block:  33%|██████████████▎                            | 2/6 [00:05<00:10,  2.73s/it]\n",
      "\u001b[Antizing layers inside the block:  50%|█████████████████████▌                     | 3/6 [00:08<00:08,  2.73s/it]\n",
      "\u001b[Antizing layers inside the block:  67%|████████████████████████████▋              | 4/6 [00:10<00:05,  2.73s/it]\n",
      "\u001b[Antizing layers inside the block:  83%|███████████████████████████████████▊       | 5/6 [00:13<00:02,  2.80s/it]\n",
      "\u001b[Antizing layers inside the block: 100%|███████████████████████████████████████████| 6/6 [00:27<00:00,  6.53s/it]\n",
      "Quantizing model.decoder.layers blocks :  19%|██████▉                              | 6/32 [02:45<11:58, 27.64s/it]\n",
      "\u001b[Antizing layers inside the block:   0%|                                                   | 0/6 [00:00<?, ?it/s]\n",
      "\u001b[Antizing layers inside the block:  17%|███████▏                                   | 1/6 [00:02<00:13,  2.70s/it]\n",
      "\u001b[Antizing layers inside the block:  33%|██████████████▎                            | 2/6 [00:05<00:10,  2.73s/it]\n",
      "\u001b[Antizing layers inside the block:  50%|█████████████████████▌                     | 3/6 [00:08<00:08,  2.71s/it]\n",
      "\u001b[Antizing layers inside the block:  67%|████████████████████████████▋              | 4/6 [00:10<00:05,  2.72s/it]\n",
      "\u001b[Antizing layers inside the block:  83%|███████████████████████████████████▊       | 5/6 [00:13<00:02,  2.78s/it]\n",
      "\u001b[Antizing layers inside the block: 100%|███████████████████████████████████████████| 6/6 [00:27<00:00,  6.58s/it]\n",
      "Quantizing model.decoder.layers blocks :  22%|████████                             | 7/32 [03:13<11:32, 27.69s/it]\n",
      "\u001b[Antizing layers inside the block:   0%|                                                   | 0/6 [00:00<?, ?it/s]\n",
      "\u001b[Antizing layers inside the block:  17%|███████▏                                   | 1/6 [00:02<00:13,  2.70s/it]\n",
      "\u001b[Antizing layers inside the block:  33%|██████████████▎                            | 2/6 [00:05<00:10,  2.72s/it]\n",
      "\u001b[Antizing layers inside the block:  50%|█████████████████████▌                     | 3/6 [00:08<00:08,  2.73s/it]\n",
      "\u001b[Antizing layers inside the block:  67%|████████████████████████████▋              | 4/6 [00:10<00:05,  2.73s/it]\n",
      "\u001b[Antizing layers inside the block:  83%|███████████████████████████████████▊       | 5/6 [00:13<00:02,  2.79s/it]\n",
      "\u001b[Antizing layers inside the block: 100%|███████████████████████████████████████████| 6/6 [00:27<00:00,  6.56s/it]\n",
      "Quantizing model.decoder.layers blocks :  25%|█████████▎                           | 8/32 [03:41<11:05, 27.72s/it]\n",
      "\u001b[Antizing layers inside the block:   0%|                                                   | 0/6 [00:00<?, ?it/s]\n",
      "\u001b[Antizing layers inside the block:  17%|███████▏                                   | 1/6 [00:02<00:13,  2.70s/it]\n",
      "\u001b[Antizing layers inside the block:  33%|██████████████▎                            | 2/6 [00:05<00:10,  2.72s/it]\n",
      "\u001b[Antizing layers inside the block:  50%|█████████████████████▌                     | 3/6 [00:08<00:08,  2.73s/it]\n",
      "\u001b[Antizing layers inside the block:  67%|████████████████████████████▋              | 4/6 [00:10<00:05,  2.74s/it]\n",
      "\u001b[Antizing layers inside the block:  83%|███████████████████████████████████▊       | 5/6 [00:13<00:02,  2.80s/it]\n",
      "\u001b[Antizing layers inside the block: 100%|███████████████████████████████████████████| 6/6 [00:27<00:00,  6.60s/it]\n",
      "Quantizing model.decoder.layers blocks :  28%|██████████▍                          | 9/32 [04:09<10:38, 27.78s/it]\n",
      "\u001b[Antizing layers inside the block:   0%|                                                   | 0/6 [00:00<?, ?it/s]\n",
      "\u001b[Antizing layers inside the block:  17%|███████▏                                   | 1/6 [00:02<00:13,  2.71s/it]\n",
      "\u001b[Antizing layers inside the block:  33%|██████████████▎                            | 2/6 [00:05<00:10,  2.70s/it]\n",
      "\u001b[Antizing layers inside the block:  50%|█████████████████████▌                     | 3/6 [00:08<00:08,  2.71s/it]\n",
      "\u001b[Antizing layers inside the block:  67%|████████████████████████████▋              | 4/6 [00:10<00:05,  2.70s/it]\n",
      "\u001b[Antizing layers inside the block:  83%|███████████████████████████████████▊       | 5/6 [00:13<00:02,  2.77s/it]\n",
      "\u001b[Antizing layers inside the block: 100%|███████████████████████████████████████████| 6/6 [00:27<00:00,  6.51s/it]\n",
      "Quantizing model.decoder.layers blocks :  31%|███████████▎                        | 10/32 [04:36<10:09, 27.71s/it]\n",
      "\u001b[Antizing layers inside the block:   0%|                                                   | 0/6 [00:00<?, ?it/s]\n",
      "\u001b[Antizing layers inside the block:  17%|███████▏                                   | 1/6 [00:02<00:13,  2.71s/it]\n",
      "\u001b[Antizing layers inside the block:  33%|██████████████▎                            | 2/6 [00:05<00:10,  2.72s/it]\n",
      "\u001b[Antizing layers inside the block:  50%|█████████████████████▌                     | 3/6 [00:08<00:08,  2.70s/it]\n",
      "\u001b[Antizing layers inside the block:  67%|████████████████████████████▋              | 4/6 [00:10<00:05,  2.70s/it]\n",
      "\u001b[Antizing layers inside the block:  83%|███████████████████████████████████▊       | 5/6 [00:13<00:02,  2.76s/it]\n",
      "\u001b[Antizing layers inside the block: 100%|███████████████████████████████████████████| 6/6 [00:27<00:00,  6.56s/it]\n",
      "Quantizing model.decoder.layers blocks :  34%|████████████▍                       | 11/32 [05:04<09:41, 27.71s/it]\n",
      "\u001b[Antizing layers inside the block:   0%|                                                   | 0/6 [00:00<?, ?it/s]\n",
      "\u001b[Antizing layers inside the block:  17%|███████▏                                   | 1/6 [00:02<00:13,  2.68s/it]\n",
      "\u001b[Antizing layers inside the block:  33%|██████████████▎                            | 2/6 [00:05<00:10,  2.68s/it]\n",
      "\u001b[Antizing layers inside the block:  50%|█████████████████████▌                     | 3/6 [00:08<00:08,  2.71s/it]\n",
      "\u001b[Antizing layers inside the block:  67%|████████████████████████████▋              | 4/6 [00:10<00:05,  2.71s/it]\n",
      "\u001b[Antizing layers inside the block:  83%|███████████████████████████████████▊       | 5/6 [00:13<00:02,  2.78s/it]\n",
      "\u001b[Antizing layers inside the block: 100%|███████████████████████████████████████████| 6/6 [00:27<00:00,  6.66s/it]\n",
      "Quantizing model.decoder.layers blocks :  38%|█████████████▌                      | 12/32 [05:32<09:15, 27.80s/it]\n",
      "\u001b[Antizing layers inside the block:   0%|                                                   | 0/6 [00:00<?, ?it/s]\n",
      "\u001b[Antizing layers inside the block:  17%|███████▏                                   | 1/6 [00:02<00:13,  2.71s/it]\n",
      "\u001b[Antizing layers inside the block:  33%|██████████████▎                            | 2/6 [00:05<00:10,  2.73s/it]\n",
      "\u001b[Antizing layers inside the block:  50%|█████████████████████▌                     | 3/6 [00:08<00:08,  2.73s/it]\n",
      "\u001b[Antizing layers inside the block:  67%|████████████████████████████▋              | 4/6 [00:10<00:05,  2.73s/it]\n",
      "\u001b[Antizing layers inside the block:  83%|███████████████████████████████████▊       | 5/6 [00:13<00:02,  2.79s/it]\n",
      "\u001b[Antizing layers inside the block: 100%|███████████████████████████████████████████| 6/6 [00:28<00:00,  6.70s/it]\n",
      "Quantizing model.decoder.layers blocks :  41%|██████████████▋                     | 13/32 [06:00<08:50, 27.92s/it]\n",
      "\u001b[Antizing layers inside the block:   0%|                                                   | 0/6 [00:00<?, ?it/s]\n",
      "\u001b[Antizing layers inside the block:  17%|███████▏                                   | 1/6 [00:02<00:13,  2.71s/it]\n",
      "\u001b[Antizing layers inside the block:  33%|██████████████▎                            | 2/6 [00:05<00:10,  2.72s/it]\n",
      "\u001b[Antizing layers inside the block:  50%|█████████████████████▌                     | 3/6 [00:08<00:08,  2.74s/it]\n",
      "\u001b[Antizing layers inside the block:  67%|████████████████████████████▋              | 4/6 [00:10<00:05,  2.75s/it]\n",
      "\u001b[Antizing layers inside the block:  83%|███████████████████████████████████▊       | 5/6 [00:13<00:02,  2.80s/it]\n",
      "\u001b[Antizing layers inside the block: 100%|███████████████████████████████████████████| 6/6 [00:28<00:00,  6.73s/it]\n",
      "Quantizing model.decoder.layers blocks :  44%|███████████████▊                    | 14/32 [06:29<08:24, 28.03s/it]\n",
      "\u001b[Antizing layers inside the block:   0%|                                                   | 0/6 [00:00<?, ?it/s]\n",
      "\u001b[Antizing layers inside the block:  17%|███████▏                                   | 1/6 [00:02<00:13,  2.70s/it]\n",
      "\u001b[Antizing layers inside the block:  33%|██████████████▎                            | 2/6 [00:05<00:10,  2.71s/it]\n",
      "\u001b[Antizing layers inside the block:  50%|█████████████████████▌                     | 3/6 [00:08<00:08,  2.73s/it]\n",
      "\u001b[Antizing layers inside the block:  67%|████████████████████████████▋              | 4/6 [00:10<00:05,  2.73s/it]\n",
      "\u001b[Antizing layers inside the block:  83%|███████████████████████████████████▊       | 5/6 [00:13<00:02,  2.80s/it]\n",
      "\u001b[Antizing layers inside the block: 100%|███████████████████████████████████████████| 6/6 [00:28<00:00,  6.70s/it]\n",
      "Quantizing model.decoder.layers blocks :  47%|████████████████▉                   | 15/32 [06:57<07:57, 28.08s/it]\n",
      "\u001b[Antizing layers inside the block:   0%|                                                   | 0/6 [00:00<?, ?it/s]\n",
      "\u001b[Antizing layers inside the block:  17%|███████▏                                   | 1/6 [00:02<00:13,  2.72s/it]\n",
      "\u001b[Antizing layers inside the block:  33%|██████████████▎                            | 2/6 [00:05<00:10,  2.73s/it]\n",
      "\u001b[Antizing layers inside the block:  50%|█████████████████████▌                     | 3/6 [00:08<00:08,  2.72s/it]\n",
      "\u001b[Antizing layers inside the block:  67%|████████████████████████████▋              | 4/6 [00:10<00:05,  2.74s/it]\n",
      "\u001b[Antizing layers inside the block:  83%|███████████████████████████████████▊       | 5/6 [00:13<00:02,  2.81s/it]\n",
      "\u001b[Antizing layers inside the block: 100%|███████████████████████████████████████████| 6/6 [00:28<00:00,  6.75s/it]\n",
      "Quantizing model.decoder.layers blocks :  50%|██████████████████                  | 16/32 [07:25<07:30, 28.16s/it]\n",
      "\u001b[Antizing layers inside the block:   0%|                                                   | 0/6 [00:00<?, ?it/s]\n",
      "\u001b[Antizing layers inside the block:  17%|███████▏                                   | 1/6 [00:02<00:14,  2.89s/it]\n",
      "\u001b[Antizing layers inside the block:  33%|██████████████▎                            | 2/6 [00:05<00:11,  2.90s/it]\n",
      "\u001b[Antizing layers inside the block:  50%|█████████████████████▌                     | 3/6 [00:08<00:08,  2.88s/it]\n",
      "\u001b[Antizing layers inside the block:  67%|████████████████████████████▋              | 4/6 [00:11<00:05,  2.89s/it]\n",
      "\u001b[Antizing layers inside the block:  83%|███████████████████████████████████▊       | 5/6 [00:14<00:02,  2.94s/it]\n",
      "\u001b[Antizing layers inside the block: 100%|███████████████████████████████████████████| 6/6 [00:29<00:00,  7.01s/it]\n",
      "Quantizing model.decoder.layers blocks :  53%|███████████████████▏                | 17/32 [07:55<07:08, 28.57s/it]\n",
      "\u001b[Antizing layers inside the block:   0%|                                                   | 0/6 [00:00<?, ?it/s]\n",
      "\u001b[Antizing layers inside the block:  17%|███████▏                                   | 1/6 [00:02<00:14,  2.90s/it]\n",
      "\u001b[Antizing layers inside the block:  33%|██████████████▎                            | 2/6 [00:05<00:11,  2.90s/it]\n",
      "\u001b[Antizing layers inside the block:  50%|█████████████████████▌                     | 3/6 [00:08<00:08,  2.88s/it]\n",
      "\u001b[Antizing layers inside the block:  67%|████████████████████████████▋              | 4/6 [00:11<00:05,  2.87s/it]\n",
      "\u001b[Antizing layers inside the block:  83%|███████████████████████████████████▊       | 5/6 [00:14<00:02,  2.93s/it]\n",
      "\u001b[Antizing layers inside the block: 100%|███████████████████████████████████████████| 6/6 [00:29<00:00,  6.98s/it]\n",
      "Quantizing model.decoder.layers blocks :  56%|████████████████████▎               | 18/32 [08:24<06:43, 28.84s/it]\n",
      "\u001b[Antizing layers inside the block:   0%|                                                   | 0/6 [00:00<?, ?it/s]\n",
      "\u001b[Antizing layers inside the block:  17%|███████▏                                   | 1/6 [00:02<00:14,  2.87s/it]\n",
      "\u001b[Antizing layers inside the block:  33%|██████████████▎                            | 2/6 [00:05<00:11,  2.86s/it]\n",
      "\u001b[Antizing layers inside the block:  50%|█████████████████████▌                     | 3/6 [00:08<00:08,  2.87s/it]\n",
      "\u001b[Antizing layers inside the block:  67%|████████████████████████████▋              | 4/6 [00:11<00:05,  2.87s/it]\n",
      "\u001b[Antizing layers inside the block:  83%|███████████████████████████████████▊       | 5/6 [00:14<00:02,  2.93s/it]\n",
      "\u001b[Antizing layers inside the block: 100%|███████████████████████████████████████████| 6/6 [00:29<00:00,  7.02s/it]\n",
      "Quantizing model.decoder.layers blocks :  59%|█████████████████████▍              | 19/32 [08:54<06:17, 29.05s/it]\n",
      "\u001b[Antizing layers inside the block:   0%|                                                   | 0/6 [00:00<?, ?it/s]\n",
      "\u001b[Antizing layers inside the block:  17%|███████▏                                   | 1/6 [00:02<00:14,  2.86s/it]\n",
      "\u001b[Antizing layers inside the block:  33%|██████████████▎                            | 2/6 [00:05<00:11,  2.86s/it]\n",
      "\u001b[Antizing layers inside the block:  50%|█████████████████████▌                     | 3/6 [00:08<00:08,  2.87s/it]\n",
      "\u001b[Antizing layers inside the block:  67%|████████████████████████████▋              | 4/6 [00:11<00:05,  2.88s/it]\n",
      "\u001b[Antizing layers inside the block:  83%|███████████████████████████████████▊       | 5/6 [00:14<00:02,  2.93s/it]\n",
      "\u001b[Antizing layers inside the block: 100%|███████████████████████████████████████████| 6/6 [00:29<00:00,  7.01s/it]\n",
      "Quantizing model.decoder.layers blocks :  62%|██████████████████████▌             | 20/32 [09:23<05:50, 29.20s/it]\n",
      "\u001b[Antizing layers inside the block:   0%|                                                   | 0/6 [00:00<?, ?it/s]\n",
      "\u001b[Antizing layers inside the block:  17%|███████▏                                   | 1/6 [00:02<00:14,  2.85s/it]\n",
      "\u001b[Antizing layers inside the block:  33%|██████████████▎                            | 2/6 [00:05<00:11,  2.87s/it]\n",
      "\u001b[Antizing layers inside the block:  50%|█████████████████████▌                     | 3/6 [00:08<00:08,  2.88s/it]\n",
      "\u001b[Antizing layers inside the block:  67%|████████████████████████████▋              | 4/6 [00:11<00:05,  2.87s/it]\n",
      "\u001b[Antizing layers inside the block:  83%|███████████████████████████████████▊       | 5/6 [00:14<00:02,  2.93s/it]\n",
      "\u001b[Antizing layers inside the block: 100%|███████████████████████████████████████████| 6/6 [00:29<00:00,  6.98s/it]\n",
      "Quantizing model.decoder.layers blocks :  66%|███████████████████████▋            | 21/32 [09:53<05:22, 29.28s/it]\n",
      "\u001b[Antizing layers inside the block:   0%|                                                   | 0/6 [00:00<?, ?it/s]\n",
      "\u001b[Antizing layers inside the block:  17%|███████▏                                   | 1/6 [00:02<00:14,  2.80s/it]\n",
      "\u001b[Antizing layers inside the block:  33%|██████████████▎                            | 2/6 [00:05<00:11,  2.85s/it]\n",
      "\u001b[Antizing layers inside the block:  50%|█████████████████████▌                     | 3/6 [00:08<00:08,  2.86s/it]\n",
      "\u001b[Antizing layers inside the block:  67%|████████████████████████████▋              | 4/6 [00:11<00:05,  2.85s/it]\n",
      "\u001b[Antizing layers inside the block:  83%|███████████████████████████████████▊       | 5/6 [00:14<00:02,  2.93s/it]\n",
      "\u001b[Antizing layers inside the block: 100%|███████████████████████████████████████████| 6/6 [00:29<00:00,  7.00s/it]\n",
      "Quantizing model.decoder.layers blocks :  69%|████████████████████████▊           | 22/32 [10:22<04:53, 29.33s/it]\n",
      "\u001b[Antizing layers inside the block:   0%|                                                   | 0/6 [00:00<?, ?it/s]\n",
      "\u001b[Antizing layers inside the block:  17%|███████▏                                   | 1/6 [00:02<00:14,  2.86s/it]\n",
      "\u001b[Antizing layers inside the block:  33%|██████████████▎                            | 2/6 [00:05<00:11,  2.88s/it]\n",
      "\u001b[Antizing layers inside the block:  50%|█████████████████████▌                     | 3/6 [00:08<00:08,  2.88s/it]\n",
      "\u001b[Antizing layers inside the block:  67%|████████████████████████████▋              | 4/6 [00:11<00:05,  2.87s/it]\n",
      "\u001b[Antizing layers inside the block:  83%|███████████████████████████████████▊       | 5/6 [00:14<00:02,  2.94s/it]\n",
      "\u001b[Antizing layers inside the block: 100%|███████████████████████████████████████████| 6/6 [00:29<00:00,  7.05s/it]\n",
      "Quantizing model.decoder.layers blocks :  72%|█████████████████████████▉          | 23/32 [10:52<04:24, 29.43s/it]\n",
      "\u001b[Antizing layers inside the block:   0%|                                                   | 0/6 [00:00<?, ?it/s]\n",
      "\u001b[Antizing layers inside the block:  17%|███████▏                                   | 1/6 [00:02<00:14,  2.83s/it]\n",
      "\u001b[Antizing layers inside the block:  33%|██████████████▎                            | 2/6 [00:05<00:11,  2.84s/it]\n",
      "\u001b[Antizing layers inside the block:  50%|█████████████████████▌                     | 3/6 [00:08<00:08,  2.84s/it]\n",
      "\u001b[Antizing layers inside the block:  67%|████████████████████████████▋              | 4/6 [00:11<00:05,  2.85s/it]\n",
      "\u001b[Antizing layers inside the block:  83%|███████████████████████████████████▊       | 5/6 [00:14<00:02,  2.92s/it]\n",
      "\u001b[Antizing layers inside the block: 100%|███████████████████████████████████████████| 6/6 [00:29<00:00,  7.01s/it]\n",
      "Quantizing model.decoder.layers blocks :  75%|███████████████████████████         | 24/32 [11:21<03:55, 29.44s/it]\n",
      "\u001b[Antizing layers inside the block:   0%|                                                   | 0/6 [00:00<?, ?it/s]\n",
      "\u001b[Antizing layers inside the block:  17%|███████▏                                   | 1/6 [00:02<00:14,  2.84s/it]\n",
      "\u001b[Antizing layers inside the block:  33%|██████████████▎                            | 2/6 [00:05<00:11,  2.87s/it]\n",
      "\u001b[Antizing layers inside the block:  50%|█████████████████████▌                     | 3/6 [00:08<00:08,  2.86s/it]\n",
      "\u001b[Antizing layers inside the block:  67%|████████████████████████████▋              | 4/6 [00:11<00:05,  2.86s/it]\n",
      "\u001b[Antizing layers inside the block:  83%|███████████████████████████████████▊       | 5/6 [00:14<00:02,  2.94s/it]\n",
      "\u001b[Antizing layers inside the block: 100%|███████████████████████████████████████████| 6/6 [00:29<00:00,  7.02s/it]\n",
      "Quantizing model.decoder.layers blocks :  78%|████████████████████████████▏       | 25/32 [11:51<03:26, 29.48s/it]\n",
      "\u001b[Antizing layers inside the block:   0%|                                                   | 0/6 [00:00<?, ?it/s]\n",
      "\u001b[Antizing layers inside the block:  17%|███████▏                                   | 1/6 [00:02<00:14,  2.85s/it]\n",
      "\u001b[Antizing layers inside the block:  33%|██████████████▎                            | 2/6 [00:05<00:11,  2.87s/it]\n",
      "\u001b[Antizing layers inside the block:  50%|█████████████████████▌                     | 3/6 [00:08<00:08,  2.87s/it]\n",
      "\u001b[Antizing layers inside the block:  67%|████████████████████████████▋              | 4/6 [00:11<00:05,  2.88s/it]\n",
      "\u001b[Antizing layers inside the block:  83%|███████████████████████████████████▊       | 5/6 [00:14<00:02,  2.94s/it]\n",
      "\u001b[Antizing layers inside the block: 100%|███████████████████████████████████████████| 6/6 [00:29<00:00,  7.05s/it]\n",
      "Quantizing model.decoder.layers blocks :  81%|█████████████████████████████▎      | 26/32 [12:20<02:57, 29.53s/it]\n",
      "\u001b[Antizing layers inside the block:   0%|                                                   | 0/6 [00:00<?, ?it/s]\n",
      "\u001b[Antizing layers inside the block:  17%|███████▏                                   | 1/6 [00:02<00:14,  2.82s/it]\n",
      "\u001b[Antizing layers inside the block:  33%|██████████████▎                            | 2/6 [00:05<00:11,  2.86s/it]\n",
      "\u001b[Antizing layers inside the block:  50%|█████████████████████▌                     | 3/6 [00:08<00:08,  2.85s/it]\n",
      "\u001b[Antizing layers inside the block:  67%|████████████████████████████▋              | 4/6 [00:11<00:05,  2.86s/it]\n",
      "\u001b[Antizing layers inside the block:  83%|███████████████████████████████████▊       | 5/6 [00:14<00:02,  2.92s/it]\n",
      "\u001b[Antizing layers inside the block: 100%|███████████████████████████████████████████| 6/6 [00:29<00:00,  7.05s/it]\n",
      "Quantizing model.decoder.layers blocks :  84%|██████████████████████████████▍     | 27/32 [12:50<02:27, 29.55s/it]\n",
      "\u001b[Antizing layers inside the block:   0%|                                                   | 0/6 [00:00<?, ?it/s]\n",
      "\u001b[Antizing layers inside the block:  17%|███████▏                                   | 1/6 [00:02<00:14,  2.86s/it]\n",
      "\u001b[Antizing layers inside the block:  33%|██████████████▎                            | 2/6 [00:05<00:11,  2.87s/it]\n",
      "\u001b[Antizing layers inside the block:  50%|█████████████████████▌                     | 3/6 [00:08<00:08,  2.88s/it]\n",
      "\u001b[Antizing layers inside the block:  67%|████████████████████████████▋              | 4/6 [00:11<00:05,  2.88s/it]\n",
      "\u001b[Antizing layers inside the block:  83%|███████████████████████████████████▊       | 5/6 [00:14<00:02,  2.93s/it]\n",
      "\u001b[Antizing layers inside the block: 100%|███████████████████████████████████████████| 6/6 [00:29<00:00,  7.04s/it]\n",
      "Quantizing model.decoder.layers blocks :  88%|███████████████████████████████▌    | 28/32 [13:20<01:58, 29.57s/it]\n",
      "\u001b[Antizing layers inside the block:   0%|                                                   | 0/6 [00:00<?, ?it/s]\n",
      "\u001b[Antizing layers inside the block:  17%|███████▏                                   | 1/6 [00:02<00:14,  2.86s/it]\n",
      "\u001b[Antizing layers inside the block:  33%|██████████████▎                            | 2/6 [00:05<00:11,  2.87s/it]\n",
      "\u001b[Antizing layers inside the block:  50%|█████████████████████▌                     | 3/6 [00:08<00:08,  2.88s/it]\n",
      "\u001b[Antizing layers inside the block:  67%|████████████████████████████▋              | 4/6 [00:11<00:05,  2.87s/it]\n",
      "\u001b[Antizing layers inside the block:  83%|███████████████████████████████████▊       | 5/6 [00:14<00:02,  2.94s/it]\n",
      "\u001b[Antizing layers inside the block: 100%|███████████████████████████████████████████| 6/6 [00:29<00:00,  7.06s/it]\n",
      "Quantizing model.decoder.layers blocks :  91%|████████████████████████████████▋   | 29/32 [13:49<01:28, 29.61s/it]\n",
      "\u001b[Antizing layers inside the block:   0%|                                                   | 0/6 [00:00<?, ?it/s]\n",
      "\u001b[Antizing layers inside the block:  17%|███████▏                                   | 1/6 [00:02<00:14,  2.87s/it]\n",
      "\u001b[Antizing layers inside the block:  33%|██████████████▎                            | 2/6 [00:05<00:11,  2.88s/it]\n",
      "\u001b[Antizing layers inside the block:  50%|█████████████████████▌                     | 3/6 [00:08<00:08,  2.89s/it]\n",
      "\u001b[Antizing layers inside the block:  67%|████████████████████████████▋              | 4/6 [00:11<00:05,  2.88s/it]\n",
      "\u001b[Antizing layers inside the block:  83%|███████████████████████████████████▊       | 5/6 [00:14<00:02,  2.95s/it]\n",
      "\u001b[Antizing layers inside the block: 100%|███████████████████████████████████████████| 6/6 [00:29<00:00,  7.07s/it]\n",
      "Quantizing model.decoder.layers blocks :  94%|█████████████████████████████████▊  | 30/32 [14:19<00:59, 29.65s/it]\n",
      "\u001b[Antizing layers inside the block:   0%|                                                   | 0/6 [00:00<?, ?it/s]\n",
      "\u001b[Antizing layers inside the block:  17%|███████▏                                   | 1/6 [00:02<00:14,  2.85s/it]\n",
      "\u001b[Antizing layers inside the block:  33%|██████████████▎                            | 2/6 [00:05<00:11,  2.85s/it]\n",
      "\u001b[Antizing layers inside the block:  50%|█████████████████████▌                     | 3/6 [00:08<00:08,  2.87s/it]\n",
      "\u001b[Antizing layers inside the block:  67%|████████████████████████████▋              | 4/6 [00:11<00:05,  2.86s/it]\n",
      "\u001b[Antizing layers inside the block:  83%|███████████████████████████████████▊       | 5/6 [00:14<00:02,  2.92s/it]\n",
      "\u001b[Antizing layers inside the block: 100%|███████████████████████████████████████████| 6/6 [00:29<00:00,  7.03s/it]\n",
      "Quantizing model.decoder.layers blocks :  97%|██████████████████████████████████▉ | 31/32 [14:49<00:29, 29.62s/it]\n",
      "\u001b[Antizing layers inside the block:   0%|                                                   | 0/6 [00:00<?, ?it/s]\n",
      "\u001b[Antizing layers inside the block:  17%|███████▏                                   | 1/6 [00:02<00:14,  2.83s/it]\n",
      "\u001b[Antizing layers inside the block:  33%|██████████████▎                            | 2/6 [00:05<00:11,  2.83s/it]\n",
      "\u001b[Antizing layers inside the block:  50%|█████████████████████▌                     | 3/6 [00:08<00:08,  2.84s/it]\n",
      "\u001b[Antizing layers inside the block:  67%|████████████████████████████▋              | 4/6 [00:11<00:05,  2.85s/it]\n",
      "\u001b[Antizing layers inside the block:  83%|███████████████████████████████████▊       | 5/6 [00:14<00:02,  2.91s/it]\n",
      "\u001b[Antizing layers inside the block: 100%|███████████████████████████████████████████| 6/6 [00:29<00:00,  7.02s/it]\n",
      "Quantizing model.decoder.layers blocks : 100%|████████████████████████████████████| 32/32 [15:18<00:00, 28.71s/it]\n",
      "/home/david/anaconda3/envs/peft/lib/python3.10/site-packages/transformers/modeling_utils.py:5055: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead\n",
      "  warnings.warn(\n"
     ]
    }
   ],
   "source": [
    "from transformers import AutoModelForCausalLM, GPTQConfig, AutoTokenizer\n",
    "\n",
    "model_name_or_path = \"facebook/opt-6.7b\"\n",
    "custom_dataset = [\"春节习俗，是与春节这一节日相关的各种习俗活动的总和，包括祭祀、拜年、娱乐、饮食等多方面。中国社会在几千年的发展中按时定期地强化中华民族的共同记忆与情感，形成了春节习俗。\",\n",
    "\"春节习俗可以分为年前习俗和节期习俗。年前习俗一般是从腊月二十三开始，一直到除夕，如祭灶官、扫房子、拐豆腐、割块肉、杀年鸡、蒸枣花、蒸馒头、贴门齐等；\",\n",
    " \"春节期习俗则是正月初一到正月十五之间的习俗，如拜年、舞龙舞狮、拜神祭祖、放鞭炮、开市、庙会、赏花灯、吃元宵等。\",\n",
    "\"春节习俗萌芽于先秦时期，起源于人们对农业生产周期的掌握和对自然的敬畏，与原始宗教、祭祀活动密切相关，如傩祭、立桃梗等活动，反映了人们祈求平安、驱邪避灾的愿望 \",\n",
    "\"定型于汉魏时期，确定了以正月为岁首的活动，如贴门神、挂桃符、驱傩、放爆竹等，同时儒家思想开始渗透到春节习俗中，出现了祭祀祖先、依次拜贺等伦理活动 。\",\n",
    "\"发展于唐宋时期，活动更加丰富多样，如守岁、立门神、挂年画、贴春帖、拜年、元宵节等，同时随着对外交流的频繁，佛教、道教等文化元素也融入到春节习俗中，出现了元宵灯节、佛教燃灯等活动，春节习俗更加多元化 。\",\n",
    "\"春节习俗随着中华民族的历史逐步完善成型，成为了中华民族的独特标志，对于中华民族的民众而言，春节习俗就是一种共同的文化记忆。\",\n",
    "\"春节习俗承担着传承着中华优秀美德的功能，对于每一个中华儿女来说，春节习俗是凝聚中华民族重要的纽带，连接着全世界中华儿女，迸发出最强劲的民族向心力 \",\n",
    "\"春节习俗起源于远古时代，与农业生产和自然周期密切相关。在先秦时期，逐渐出现有记载的春节习俗活动，具有浓厚的宗教色彩和功利性. \",\n",
    " \"年末廿三或廿四日，在民间称为“小年”，从小年起人们便开始“忙年”了。小年并非专指一个日子，由于各地风俗，被称为“小年”的日子也不尽相同。\",\n",
    "\"小年期间主要的民俗活动有扫尘、祭灶等。在清朝之前，民间传统的小年祭灶日是腊月二十四。\",\n",
    "\"从清朝中后期开始，帝王家就于腊月二十三举行祭天大典，为了“节省开支”，顺便把灶王爷也给拜了，因此北方地区民间百姓相效仿，多在腊月二十三过小年。\",\n",
    "\"南方大部分地区，仍然保持着腊月二十四过小年的传统。  \",\n",
    "\"年前忙年主要是以除旧布新为活动主题，扫尘是年前除旧布新习俗之一。民谚称“腊月二十四，掸尘扫房子”。年末廿三/廿四便正式地开始做迎接过年的准备。\",\n",
    "\"扫尘就是年终大扫除，北方称“扫房”，南方称“扫屋”。每逢春节来临，家家户户都要打扫环境，清洗各种器具，拆洗被褥窗帘，洒扫六闾庭院，掸拂尘垢蛛网，疏浚明渠暗沟。\",\n",
    "\"到处洋溢着欢欢喜喜搞卫生、干干净净迎新春的欢乐氛围。\"]\n",
    "\n",
    "custom_quantization_config = GPTQConfig(\n",
    "    bits=4,\n",
    "    group_size=128,\n",
    "    desc_act=False,\n",
    "    dataset=custom_dataset\n",
    ")\n",
    "\n",
    "custom_quant_model = AutoModelForCausalLM.from_pretrained(model_name_or_path,\n",
    "                                                          quantization_config=custom_quantization_config,\n",
    "                                                          torch_dtype=torch.float16,\n",
    "                                                          device_map=\"auto\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "500c8ac9-962a-4802-8e1d-3ae19ce5897a",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "中国人在春节的除夕习俗是被抓起了一个政治的结果。\n",
      "\n",
      "                                  \n"
     ]
    }
   ],
   "source": [
    "tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)\n",
    "text = \"中国人在春节的除夕习俗是\"\n",
    "inputs = tokenizer(text, return_tensors=\"pt\").to(0)\n",
    "\n",
    "out = custom_quant_model.generate(**inputs, max_new_tokens=64)\n",
    "print(tokenizer.decode(out[0], skip_special_tokens=True))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "0aca3cce-1b06-45e5-a1ef-08868562fe63",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "中国人在春节有那些习俗的论坛,我们都不知道你们都是中国人,你们都是中国人。\n",
      "中国人都是中国人,你们都是\n"
     ]
    }
   ],
   "source": [
    "text = \"中国人在春节有那些习俗\"\n",
    "inputs = tokenizer(text, return_tensors=\"pt\").to(0)\n",
    "\n",
    "out = custom_quant_model.generate(**inputs, max_new_tokens=64)\n",
    "print(tokenizer.decode(out[0], skip_special_tokens=True))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9297a676-2f6c-462d-ba3b-86eac6c787dd",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.16"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
